{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Demo\n",
    "We'll gather all the functions from the other notebooks and generate questions for an entirely new text.\n",
    "\n",
    "You can also run this notebook independently with any text and see the results. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Common imports\n",
    "import pandas as pd\n",
    "from IPython.display import Markdown, display, clear_output"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pickling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import _pickle as cPickle\n",
    "from pathlib import Path\n",
    "\n",
    "def dumpPickle(fileName, content):\n",
    "    pickleFile = open(fileName, 'wb')\n",
    "    cPickle.dump(content, pickleFile, -1)\n",
    "    pickleFile.close()\n",
    "\n",
    "def loadPickle(fileName):    \n",
    "    file = open(fileName, 'rb')\n",
    "    content = cPickle.load(file)\n",
    "    file.close()\n",
    "    \n",
    "    return content\n",
    "    \n",
    "def pickleExists(fileName):\n",
    "    file = Path(fileName)\n",
    "    \n",
    "    if file.is_file():\n",
    "        return True\n",
    "    \n",
    "    return False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## *Extract all words from plain text and generate it's features*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import spacy\n",
    "from spacy import displacy\n",
    "nlp = spacy.load('en_core_web_sm')\n",
    "\n",
    "#Extract answers and the sentence they are in\n",
    "def extractAnswers(qas, doc):\n",
    "    answers = []\n",
    "\n",
    "    senStart = 0\n",
    "    senId = 0\n",
    "\n",
    "    for sentence in doc.sents:\n",
    "        senLen = len(sentence.text)\n",
    "\n",
    "        for answer in qas:\n",
    "            answerStart = answer['answers'][0]['answer_start']\n",
    "\n",
    "            if (answerStart >= senStart and answerStart < (senStart + senLen)):\n",
    "                answers.append({'sentenceId': senId, 'text': answer['answers'][0]['text']})\n",
    "\n",
    "        senStart += senLen\n",
    "        senId += 1\n",
    "    \n",
    "    return answers\n",
    "\n",
    "#TODO - Clean answers from stopwords?\n",
    "def tokenIsAnswer(token, sentenceId, answers):\n",
    "    for i in range(len(answers)):\n",
    "        if (answers[i]['sentenceId'] == sentenceId):\n",
    "            if (answers[i]['text'] == token):\n",
    "                return True\n",
    "    return False\n",
    "\n",
    "#Save named entities start points\n",
    "\n",
    "def getNEStartIndexs(doc):\n",
    "    neStarts = {}\n",
    "    for ne in doc.ents:\n",
    "        neStarts[ne.start] = ne\n",
    "        \n",
    "    return neStarts \n",
    "\n",
    "def getSentenceStartIndexes(doc):\n",
    "    senStarts = []\n",
    "    \n",
    "    for sentence in doc.sents:\n",
    "        senStarts.append(sentence[0].i)\n",
    "    \n",
    "    return senStarts\n",
    "    \n",
    "def getSentenceForWordPosition(wordPos, senStarts):\n",
    "    for i in range(1, len(senStarts)):\n",
    "        if (wordPos < senStarts[i]):\n",
    "            return i - 1\n",
    "        \n",
    "def addWordsForParagrapgh(newWords, text):\n",
    "    doc = nlp(text)\n",
    "\n",
    "    neStarts = getNEStartIndexs(doc)\n",
    "    senStarts = getSentenceStartIndexes(doc)\n",
    "    \n",
    "    #index of word in spacy doc text\n",
    "    i = 0\n",
    "    \n",
    "    while (i < len(doc)):\n",
    "        #If the token is a start of a Named Entity, add it and push to index to end of the NE\n",
    "        if (i in neStarts):\n",
    "            word = neStarts[i]\n",
    "            #add word\n",
    "            currentSentence = getSentenceForWordPosition(word.start, senStarts)\n",
    "            wordLen = word.end - word.start\n",
    "            shape = ''\n",
    "            for wordIndex in range(word.start, word.end):\n",
    "                shape += (' ' + doc[wordIndex].shape_)\n",
    "\n",
    "            newWords.append([word.text,\n",
    "                            0,\n",
    "                            0,\n",
    "                            currentSentence,\n",
    "                            wordLen,\n",
    "                            word.label_,\n",
    "                            None,\n",
    "                            None,\n",
    "                            None,\n",
    "                            shape])\n",
    "            i = neStarts[i].end - 1\n",
    "        #If not a NE, add the word if it's not a stopword or a non-alpha (not regular letters)\n",
    "        else:\n",
    "            if (doc[i].is_stop == False and doc[i].is_alpha == True):\n",
    "                word = doc[i]\n",
    "\n",
    "                currentSentence = getSentenceForWordPosition(i, senStarts)\n",
    "                wordLen = 1\n",
    "\n",
    "                newWords.append([word.text,\n",
    "                                0,\n",
    "                                0,\n",
    "                                currentSentence,\n",
    "                                wordLen,\n",
    "                                None,\n",
    "                                word.pos_,\n",
    "                                word.tag_,\n",
    "                                word.dep_,\n",
    "                                word.shape_])\n",
    "        i += 1\n",
    "\n",
    "def oneHotEncodeColumns(df):\n",
    "    columnsToEncode = ['NER', 'POS', \"TAG\", 'DEP']\n",
    "\n",
    "    for column in columnsToEncode:\n",
    "        one_hot = pd.get_dummies(df[column])\n",
    "        one_hot = one_hot.add_prefix(column + '_')\n",
    "\n",
    "        df = df.drop(column, axis = 1)\n",
    "        df = df.join(one_hot)\n",
    "    \n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## *Predict whether a word is a keyword* "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generateDf(text):\n",
    "    words = []\n",
    "    addWordsForParagrapgh(words, text)\n",
    "\n",
    "    wordColums = ['text', 'titleId', 'paragrapghId', 'sentenceId','wordCount', 'NER', 'POS', 'TAG', 'DEP','shape']\n",
    "    df = pd.DataFrame(words, columns=wordColums)\n",
    "    \n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prepareDf(df):\n",
    "    #One-hot encoding\n",
    "    wordsDf = oneHotEncodeColumns(df)\n",
    "\n",
    "    #Drop unused columns\n",
    "    columnsToDrop = ['text', 'titleId', 'paragrapghId', 'sentenceId', 'shape']\n",
    "    wordsDf = wordsDf.drop(columnsToDrop, axis = 1)\n",
    "\n",
    "    #Add missing colums \n",
    "    predictorColumns = ['wordCount','NER_CARDINAL','NER_DATE','NER_EVENT','NER_FAC','NER_GPE','NER_LANGUAGE','NER_LAW','NER_LOC','NER_MONEY','NER_NORP','NER_ORDINAL','NER_ORG','NER_PERCENT','NER_PERSON','NER_PRODUCT','NER_QUANTITY','NER_TIME','NER_WORK_OF_ART','POS_ADJ','POS_ADP','POS_ADV','POS_CCONJ','POS_DET','POS_INTJ','POS_NOUN','POS_NUM','POS_PART','POS_PRON','POS_PROPN','POS_PUNCT','POS_SYM','POS_VERB','POS_X','TAG_''','TAG_-LRB-','TAG_.','TAG_ADD','TAG_AFX','TAG_CC','TAG_CD','TAG_DT','TAG_EX','TAG_FW','TAG_IN','TAG_JJ','TAG_JJR','TAG_JJS','TAG_LS','TAG_MD','TAG_NFP','TAG_NN','TAG_NNP','TAG_NNPS','TAG_NNS','TAG_PDT','TAG_POS','TAG_PRP','TAG_PRP$','TAG_RB','TAG_RBR','TAG_RBS','TAG_RP','TAG_SYM','TAG_TO','TAG_UH','TAG_VB','TAG_VBD','TAG_VBG','TAG_VBN','TAG_VBP','TAG_VBZ','TAG_WDT','TAG_WP','TAG_WRB','TAG_XX','DEP_ROOT','DEP_acl','DEP_acomp','DEP_advcl','DEP_advmod','DEP_agent','DEP_amod','DEP_appos','DEP_attr','DEP_aux','DEP_auxpass','DEP_case','DEP_cc','DEP_ccomp','DEP_compound','DEP_conj','DEP_csubj','DEP_csubjpass','DEP_dative','DEP_dep','DEP_det','DEP_dobj','DEP_expl','DEP_intj','DEP_mark','DEP_meta','DEP_neg','DEP_nmod','DEP_npadvmod','DEP_nsubj','DEP_nsubjpass','DEP_nummod','DEP_oprd','DEP_parataxis','DEP_pcomp','DEP_pobj','DEP_poss','DEP_preconj','DEP_predet','DEP_prep','DEP_prt','DEP_punct','DEP_quantmod','DEP_relcl','DEP_xcomp']\n",
    "\n",
    "    for feature in predictorColumns:\n",
    "        if feature not in wordsDf.columns:\n",
    "            wordsDf[feature] = 0\n",
    "    \n",
    "    return wordsDf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predictWords(wordsDf, df):\n",
    "    \n",
    "    predictorPickleName = 'data/pickles/nb-predictor.pkl'\n",
    "    predictor = loadPickle(predictorPickleName)\n",
    "    \n",
    "    y_pred = predictor.predict_proba(wordsDf)\n",
    "\n",
    "    labeledAnswers = []\n",
    "    for i in range(len(y_pred)):\n",
    "        labeledAnswers.append({'word': df.iloc[i]['text'], 'prob': y_pred[i][0]})\n",
    "    \n",
    "    return labeledAnswers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## *Extract questions*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def blankAnswer(firstTokenIndex, lastTokenIndex, sentStart, sentEnd, doc):\n",
    "    leftPartStart = doc[sentStart].idx\n",
    "    leftPartEnd = doc[firstTokenIndex].idx\n",
    "    rightPartStart = doc[lastTokenIndex].idx + len(doc[lastTokenIndex])\n",
    "    rightPartEnd = doc[sentEnd - 1].idx + len(doc[sentEnd - 1])\n",
    "    \n",
    "    question = doc.text[leftPartStart:leftPartEnd] + '_____' + doc.text[rightPartStart:rightPartEnd]\n",
    "    \n",
    "    return question\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def addQuestions(answers, text):\n",
    "    doc = nlp(text)\n",
    "    currAnswerIndex = 0\n",
    "    qaPair = []\n",
    "\n",
    "    #Check wheter each token is the next answer\n",
    "    for sent in doc.sents:\n",
    "        for token in sent:\n",
    "            \n",
    "            #If all the answers have been found, stop looking\n",
    "            if currAnswerIndex >= len(answers):\n",
    "                break\n",
    "            \n",
    "            #In the case where the answer is consisted of more than one token, check the following tokens as well.\n",
    "            answerDoc = nlp(answers[currAnswerIndex]['word'])\n",
    "            answerIsFound = True\n",
    "            \n",
    "            for j in range(len(answerDoc)):\n",
    "                if token.i + j >= len(doc) or doc[token.i + j].text != answerDoc[j].text:\n",
    "                    answerIsFound = False\n",
    "           \n",
    "            #If the current token is corresponding with the answer, add it \n",
    "            if answerIsFound:\n",
    "                question = blankAnswer(token.i, token.i + len(answerDoc) - 1, sent.start, sent.end, doc)\n",
    "                \n",
    "                qaPair.append({'question' : question, 'answer': answers[currAnswerIndex]['word'], 'prob': answers[currAnswerIndex]['prob']})\n",
    "                \n",
    "                currAnswerIndex += 1\n",
    "                \n",
    "    return qaPair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sortAnswers(qaPairs):\n",
    "    orderedQaPairs = sorted(qaPairs, key=lambda qaPair: qaPair['prob'])\n",
    "    \n",
    "    return orderedQaPairs    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## *Distractors*\n",
    "Taken from the *04. Generating incorrect answers/Incorrect-answers* notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gensim\n",
    "from gensim.test.utils import datapath, get_tmpfile\n",
    "from gensim.models import KeyedVectors\n",
    "\n",
    "glove_file = 'data/embeddings/glove.6B.300d.txt'\n",
    "tmp_file = 'data/embeddings/word2vec-glove.6B.300d.txt'\n",
    "\n",
    "from gensim.scripts.glove2word2vec import glove2word2vec\n",
    "glove2word2vec(glove_file, tmp_file)\n",
    "model = KeyedVectors.load_word2vec_format(tmp_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_distractors(answer, count):\n",
    "    answer = str.lower(answer)\n",
    "    \n",
    "    ##Extracting closest words for the answer. \n",
    "    try:\n",
    "        closestWords = model.most_similar(positive=[answer], topn=count)\n",
    "    except:\n",
    "        #In case the word is not in the vocabulary, or other problem not loading embeddings\n",
    "        return []\n",
    "\n",
    "    #Return count many distractors\n",
    "    distractors = list(map(lambda x: x[0], closestWords))[0:count]\n",
    "    \n",
    "    return distractors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def addDistractors(qaPairs, count):\n",
    "    for qaPair in qaPairs:\n",
    "        distractors = generate_distractors(qaPair['answer'], count)\n",
    "        qaPair['distractors'] = distractors\n",
    "    \n",
    "    return qaPairs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Main function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generateQuestions(text, count):\n",
    "    \n",
    "    # Extract words \n",
    "    df = generateDf(text)\n",
    "    wordsDf = prepareDf(df)\n",
    "    \n",
    "    # Predict \n",
    "    labeledAnswers = predictWords(wordsDf, df)\n",
    "    \n",
    "    # Transform questions\n",
    "    qaPairs = addQuestions(labeledAnswers, text)\n",
    "    \n",
    "    # Pick the best questions\n",
    "    orderedQaPairs = sortAnswers(qaPairs)\n",
    "    \n",
    "    # Generate distractors\n",
    "    questions = addDistractors(orderedQaPairs[:count], 4)\n",
    "    \n",
    "    # Print\n",
    "    for i in range(count):\n",
    "        display(Markdown('### Question ' + str(i + 1) + ':'))\n",
    "        print(questions[i]['question'])\n",
    "\n",
    "        display(Markdown('#### Answer:'))\n",
    "        print(questions[i]['answer'])\n",
    "        \n",
    "        display(Markdown('#### Incorrect answers:'))\n",
    "        for distractor in questions[i]['distractors']:\n",
    "            print(distractor)\n",
    "        \n",
    "        print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "### Question 1:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Oxygen is a chemical element with symbol _____ and atomic number 8.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "O\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "y\n",
      "n\n",
      "te\n",
      "[\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 2:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "It is a member of the chalcogen group on the periodic table, a highly reactive nonmetal, and an _____ agent that readily forms oxides with most elements as well as with other compounds.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "oxidizing\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "oxidising\n",
      "oxidized\n",
      "oxidize\n",
      "permanganate\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 3:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Oxygen is a _____ element with symbol O and atomic number 8.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chemical\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chemicals\n",
      "biological\n",
      "toxic\n",
      "compounds\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 4:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Oxygen is a chemical element with symbol O and _____ number 8.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "atomic\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nuclear\n",
      "iaea\n",
      "weapons\n",
      "plutonium\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 5:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "It is a member of the chalcogen group on the _____ table, a highly reactive nonmetal, and an oxidizing agent that readily forms oxides with most elements as well as with other compounds.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "periodic\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frequent\n",
      "periodically\n",
      "continual\n",
      "continuous\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 6:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "It is a member of the chalcogen group on the periodic table, a highly _____ nonmetal, and an oxidizing agent that readily forms oxides with most elements as well as with other compounds.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "reactive\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "reactivity\n",
      "chemically\n",
      "proactive\n",
      "ions\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 7:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "By mass, oxygen is the third-most _____ element in the universe, after hydrogen and helium.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "abundant\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "plentiful\n",
      "abundance\n",
      "ample\n",
      "scarce\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 8:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "At _____ temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic gas with the formula O2.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "standard\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "standards\n",
      "definition\n",
      "example\n",
      "basic\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 9:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless _____ gas with the formula O2.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "diatomic\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "polyatomic\n",
      "halogens\n",
      "monatomic\n",
      "fluorine\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "### Question 10:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_____ oxygen gas constitutes 20.8% of the Earth's atmosphere.\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Answer:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Diatomic\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "#### Incorrect answers:"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "polyatomic\n",
      "halogens\n",
      "monatomic\n",
      "fluorine\n",
      "\n"
     ]
    }
   ],
   "source": [
    "text = \"Oxygen is a chemical element with symbol O and atomic number 8. It is a member of the chalcogen group on the periodic table, a highly reactive nonmetal, and an oxidizing agent that readily forms oxides with most elements as well as with other compounds. By mass, oxygen is the third-most abundant element in the universe, after hydrogen and helium. At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic gas with the formula O2. Diatomic oxygen gas constitutes 20.8% of the Earth's atmosphere. As compounds including oxides, the element makes up almost half of the Earth's crust.\"\n",
    "\n",
    "generateQuestions(text, 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
